import sys 
import numpy as np
import matplotlib.pyplot as plt 
sys.path.append('..')
from common.util import create_co_matrix, ppmi, preprocess


text = "You say goodbye and I say hello."
corpus,word_to_id,id_to_word = preprocess(text)
vocab_size = len(id_to_word)
C = create_co_matrix(corpus,vocab_size,window_size=1)
W = ppmi(C)

# svd
U,S,V = np.linalg.svd(W)

np.set_printoptions(precision=3)
print(C[0])
print(W[0])
print(U[0])

# plot
for word, word_id in word_to_id.items():
    plt.annotate(word,(U[word_id,0],U[word_id,1]))
plt.scatter(U[:,0],U[:,1],alpha=0.5)
plt.show()